home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Languguage OS 2
/
Languguage OS II Version 10-94 (Knowledge Media)(1994).ISO
/
language
/
ici
/
ici.cpi
/
lex.c
< prev
next >
Wrap
C/C++ Source or Header
|
1994-10-27
|
10KB
|
679 lines
#include "parse.h"
#include "file.h"
#include "buf.h"
#include "src.h"
#include "array.h"
#ifndef NOTRACE
#include "trace.h"
#endif
char *buf;
int bufz;
int
growbuf(n)
register int n;
{
register char *p;
if (bufz > n)
return 0;
n = (n + 2) * 2;
if ((p = zalloc(n)) == NULL)
return 1;
if (buf != NULL)
{
memcpy(p, buf, bufz);
zfree(buf);
}
buf = p;
bufz = n - 1;
return 0;
}
#ifndef NOTRACE
static int ungotten = 0;
#endif
int
get(p, a)
register parse_t *p;
register array_t *a;
{
int c;
if ((c = (*p->p_file->f_type->ft_getch)(p->p_file->f_file)) == '\n' || c == '\r')
{
if (c == '\n' && p->p_sol && p->p_cr)
{
/*
* This is a \n after after a \r. That is regarded as just one
* newline. Get the next character.
*/
c = (*p->p_file->f_type->ft_getch)(p->p_file->f_file);
if (c == '\n' || c == '\r')
{
++p->p_lineno;
p->p_cr = c == '\r';
c = '\n';
}
else
p->p_sol = 0;
}
else
{
p->p_sol = 1;
++p->p_lineno;
p->p_cr = c == '\r';
c = '\n';
}
}
else
p->p_sol = 0;
if (p->p_depth > 0 && a != NULL)
{
/*
* We are within a compound statement and there is a code array
* being built. Update any trailing source marker, and if there
* isn't one, add one.
*/
if (a->a_top > a->a_base && issrc(a->a_top[-1]))
srcof(a->a_top[-1])->s_lineno = p->p_lineno;
else if (pushcheck(a, 1) == 0)
{
if ((*a->a_top = objof(new_src(p->p_lineno, p->p_file->f_name))) != NULL)
{
loose(*a->a_top);
++a->a_top;
}
}
}
#ifndef NOTRACE
if (ungotten)
ungotten = 0;
else
{
if (trace_yes && (trace_flags & TRACE_LEXER) && c != EOF)
{
fprintf(stderr, "%c", c);
if (c == '\n')
fprintf(stderr,"trace: ");
}
}
#endif
return c;
}
void
unget(p, c)
parse_t *p;
int c;
{
(*p->p_file->f_type->ft_ungetch)(c, p->p_file->f_file);
if (c == '\n')
--p->p_lineno;
#ifndef NOTRACE
ungotten = 1;
#endif
}
int
lex(p, a)
parse_t *p;
array_t *a;
{
register int c;
register int t = 0; /* init to shut up compiler */
register int i;
register int fstate;
char *s;
long l;
double d;
/*
* Skip white space, in its various forms.
*/
for (;;)
{
i = p->p_sol;
if ((c = get(p, a)) == '#' && i)
{
while ((c = get(p, a)) != '\n' && c != EOF)
;
continue;
}
else if (c == '\n')
continue;
if (c == '/')
{
if ((c = get(p, a)) != '*')
{
unget(p, c);
goto slash;
}
/*
* A comment.
*/
while ((c = get(p, a)) != EOF)
{
if (c == '*')
{
if ((c = get(p, a)) == '/')
break;
unget(p, c);
}
}
continue;
}
if (c != ' ' && c != '\t')
break;
}
/*
* Decypher the next token.
*/
switch (c)
{
case '/':
slash:
if ((c = get(p, a)) == '=')
t = T_SLASHEQ;
else
{
unget(p, c);
t = T_SLASH;
}
break;
case EOF:
t = T_EOF;
break;
case '$':
t = T_DOLLAR;
break;
case '@':
t = T_AT;
break;
case '(':
t = T_ONROUND;
break;
case ')':
t = T_OFFROUND;
break;
case '{':
t = T_ONCURLY;
break;
case '}':
t = T_OFFCURLY;
break;
case ',':
t = T_COMMA;
break;
case '~':
if ((c = get(p, a)) == '~')
{
if ((c = get(p, a)) == '~')
t = T_3TILDE;
else if (c == '=')
t = T_2TILDEEQ;
else
{
unget(p, c);
t = T_2TILDE;
}
}
else
{
unget(p, c);
t = T_TILDE;
}
break;
case '[':
t = T_ONSQUARE;
break;
case ']':
t = T_OFFSQUARE;
break;
case '.':
if ((c = get(p, a)) >= '0' && c <= '9')
{
unget(p, c);
c = '.';
i = 0;
goto alphanum;
}
unget(p, c);
t = T_DOT;
break;
case '*':
if ((c = get(p, a)) == '=')
t = T_ASTERIXEQ;
else
{
unget(p, c);
t = T_ASTERIX;
}
break;
case '%':
if ((c = get(p, a)) == '=')
t = T_PERCENTEQ;
else
{
unget(p, c);
t = T_PERCENT;
}
break;
case '^':
if ((c = get(p, a)) == '=')
t = T_CARETEQ;
else
{
unget(p, c);
t = T_CARET;
}
break;
case '+':
if ((c = get(p, a)) == '=')
t = T_PLUSEQ;
else if (c == '+')
t = T_PLUSPLUS;
else
{
unget(p, c);
t = T_PLUS;
}
break;
case '-':
if ((c = get(p, a)) == '>')
t = T_PTR;
else if (c == '=')
t = T_MINUSEQ;
else if (c == '-')
t = T_MINUSMINUS;
else
{
unget(p, c);
t = T_MINUS;
}
break;
case '>':
if ((c = get(p, a)) == '>')
{
if ((c = get(p, a)) == '=')
t = T_GRTGRTEQ;
else
{
unget(p, c);
t = T_GRTGRT;
}
}
else if (c == '=')
t = T_GRTEQ;
else
{
unget(p, c);
t = T_GRT;
}
break;
case '<':
if ((c = get(p, a)) == '<')
{
if ((c = get(p, a)) == '=')
t = T_LESSLESSEQ;
else
{
unget(p, c);
t = T_LESSLESS;
}
}
else if (c == '=')
{
if ((c = get(p, a)) == '>')
t = T_LESSEQGRT;
else
{
unget(p, c);
t = T_LESSEQ;
}
}
else
{
unget(p, c);
t = T_LESS;
}
break;
case '=':
if ((c = get(p, a)) == '=')
t = T_EQEQ;
else
{
unget(p, c);
t = T_EQ;
}
break;
case '!':
if ((c = get(p, a)) == '=')
t = T_EXCLAMEQ;
else if (c == '~')
t = T_EXCLAMTILDE;
else
{
unget(p, c);
t = T_EXCLAM;
}
break;
case '&':
if ((c = get(p, a)) == '&')
t = T_ANDAND;
else if (c == '=')
t = T_ANDEQ;
else
{
unget(p, c);
t = T_AND;
}
break;
case '|':
if ((c = get(p, a)) == '|')
t = T_BARBAR;
else if (c == '=')
t = T_BAREQ;
else
{
unget(p, c);
t = T_BAR;
}
break;
case ';':
t = T_SEMICOLON;
break;
case '?':
t = T_QUESTION;
break;
case ':':
t = T_COLON;
break;
case '#':
i = 0;
while ((c = get(p, a)) != '#' && c != '\n' && c != EOF)
{
if (chkbuf(i))
goto fail;
buf[i++] = c;
}
if (c == '\n')
{
error = "newline in #...#";
goto fail;
}
buf[i] = '\0';
if ((p->p_got.t_obj = objof(new_regexp(buf))) == NULL)
goto fail;
t = T_REGEXP;
break;
case '\'':
t = T_INT;
goto chars;
case '\"':
t = T_STRING;
chars:
i = 0;
while ((c = get(p, a)) != (t == T_INT ? '\'' : '"') && c != '\n' && c!=EOF)
{
if (chkbuf(i))
goto fail;
if (c == '\\')
{
switch (c = get(p, a))
{
case '\n': continue;
case 'n': c = '\n'; break;
case 't': c = '\t'; break;
case 'v': c = '\v'; break;
case 'b': c = '\b'; break;
case 'r': c = '\r'; break;
case 'f': c = '\014'; break;
case 'a': c = '\007'; break;
case 'e': c = '\033'; break;
case '\\': break;
case '\'': break;
case '"': break;
case '?': break;
case 'c':
c = get(p, a) & 0x1F;
break;
case 'x':
l = 0;
while (((c = get(p, a)) >= '0' && c <= '9')
|| (c >= 'a' && c <= 'f')
|| (c >= 'A' && c <= 'F'))
{
if (c >= 'a' && c <= 'f')
c -= 'a' - 10;
else if (c >= 'A' && c <= 'F')
c -= 'A' - 10;
else
c -= '0';
l = l * 16 + c;
}
unget(p, c);
c = l;
break;
default:
if (c >= '0' && c <= '7')
{
l = c - '0';
if ((c = get(p, a)) >= '0' && c <= '7')
{
l = l * 8 + c - '0';
if ((c = get(p, a)) >= '0' && c <= '7')
l = l * 8 + c - '0';
else
unget(p, c);
}
else
unget(p, c);
c = l;
}
else
{
error = "unknown \\ escape";
goto fail;
}
}
}
buf[i++] = c;
if (t == T_INT)
{
if (get(p, a) != '\'')
{
error = "too many chars in ' ' sequence";
goto fail;
}
break;
}
}
if (chkbuf(i))
goto fail;
buf[i] = '\0';
if (t == T_INT)
{
if (i == 0)
{
error = "newline in ' '";
goto fail;
}
p->p_got.t_int = buf[0] & 0xFF;
}
else
{
if (c == '\n')
{
error = "newline in \"...\"";
goto fail;
}
if ((p->p_got.t_obj = objof(new_name(buf, i))) == NULL)
goto fail;
}
break;
default:
if
(
(c < '0' || c > '9')
&&
(c < 'a' || c > 'z')
&&
(c < 'A' || c > 'Z')
&&
c != '_'
&&
c != '.'
)
{
error = "lexical error";
goto fail;
}
/*
* States to keep track of passage through a floating point number.
* ddd[.ddd][e|E[+|-]ddd]
*/
#define FS_NOTF 0
#define FS_ININT 1
#define FS_INFRAC 2
#define FS_POSTE 3
#define FS_INEXP 4
i = 0;
alphanum:
fstate = c=='.' ? FS_INFRAC : c>='0' && c<='9' ? FS_ININT : FS_NOTF;
for (;;)
{
if (chkbuf(i))
goto fail;
buf[i++] = c;
c = get(p, a);
switch (fstate)
{
case FS_POSTE:
if ((c >= '0' && c <= '9') || c == '+' || c == '-')
{
fstate = FS_INEXP;
continue;
}
goto notf;
case FS_ININT:
if (c == '.')
{
fstate = FS_INFRAC;
continue;
}
case FS_INFRAC:
if (c == 'e' || c == 'E')
{
fstate = FS_POSTE;
continue;
}
case FS_INEXP:
if (c >= '0' && c <= '9')
continue;
notf:
fstate = FS_NOTF;
case FS_NOTF:
if
(
(c >= '0' && c <= '9')
||
(c >= 'a' && c <= 'z')
||
(c >= 'A' && c <= 'Z')
||
c == '_'
)
continue;
break;
}
break;
}
unget(p, c);
if (chkbuf(i))
break;
buf[i] = '\0';
l = strtol(buf, &s, 0);
if (*s == '\0')
{
p->p_got.t_int = l;
t = T_INT;
break;
}
d = strtod(buf, &s);
if (*s == '\0')
{
p->p_got.t_float = d;
t = T_FLOAT;
#if defined(MSDOS) || defined(NSFIP)
if
(
buf[1] != '\0'
||
!(
(buf[0] >= 'a' && buf[0] <= 'z')
||
(buf[0] >= 'A' && buf[0] <= 'Z')
)
)
#endif
break;
}
if ((p->p_got.t_obj = objof(new_cname(buf))) == NULL)
goto fail;
t = T_NAME;
break;
}
p->p_got.t_what = t;
return t;
fail:
p->p_got.t_what = T_ERROR;
p->p_got.t_str = error;
return T_ERROR;
}